package com.example.djlocrspringboot.djlocr;

import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.example.djlocrspringboot.djlocr.djl.OcrV3RecognitionExample;
import com.itextpdf.text.Document;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;

/**
 * Copyright © XXX. All rights reserved.
 *
 * @Title PDFSimilarityComparator
 * @Prject: Hyperledger-Fabric-V1-Test
 * @Package: com.example.hyperledgerfabricv1test
 * @Description:
 * @Author: liyl
 * @Date: 2023/5/19 15:26
 * @version:
 */
@Component
public class PDFSimilarityComparator {
    
    @Autowired
    private OcrV3RecognitionExample ocrV3RecognitionExample;
    
    public String testSpringBootDJLOcr(byte[] bytes, Integer dpi, Integer threadNumber){
        try {
            System.out.println("dpi:" + dpi);
            List<Pdf2ImgDTO> jpg2 = pdf2Image(bytes, "jpg", dpi);
            List<byte[]> jpgBytes2 = jpg2.stream().map(Pdf2ImgDTO::getPdfByte).collect(Collectors.toList());
            StringBuffer stringBuffer2 = new StringBuffer();
            // ThreadPoolExecutor executor = new ThreadPoolExecutor(threadNumber, threadNumber, 100, SECONDS, new ArrayBlockingQueue<Runnable>(100), Executors.defaultThreadFactory(), new ThreadPoolExecutor.AbortPolicy());
            for (byte[] jpgByte : jpgBytes2) {
                // executor.execute(() -> {
                    String djlOcrStr = ocrV3RecognitionExample.djlOcr(jpgByte);
                    stringBuffer2.append(djlOcrStr);
                // });
            }
            return stringBuffer2.toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    
    // public static void main(String[] args) {
    //     // File file1 = new File("C:\\Users\\liyl\\Desktop\\img-519165040.pdf");
    //     // File file2 = new File("C:\\Users\\liyl\\Desktop\\客户资料1、富国基金客户信息采集表.pdf");
    //     // 设置使用GPU
    //     // System.setProperty("ai.djl.cuda.disabled", "false");
    //     // 设置PaddlePaddle引擎
    //     // System.setProperty("ai.djl.engine.type", "Ppndz");
    //     // System.setProperty("DJL_ENGINE_TYPE", "PaddlePaddle");
    //     // System.setProperty("DJL_PADDLE_NUM_THREADS", "5"); // 设置所需的线程数
    //     OcrV3RecognitionExample.init();
    //     try {
    //         long timeInferStart = System.currentTimeMillis();
    //         ThreadPoolExecutor executor = new ThreadPoolExecutor(2, 2, 100, SECONDS, new ArrayBlockingQueue<Runnable>(100), Executors.defaultThreadFactory(), new ThreadPoolExecutor.AbortPolicy());
    //
    //         List<Pdf2ImgDTO> jpg = pdf2Image(fileToByte("C:\\Users\\liyl\\Desktop\\img-519165040.pdf"), "jpg", 50);
    //         List<byte[]> jpgBytes = jpg.stream().map(Pdf2ImgDTO::getPdfByte).collect(Collectors.toList());
    //         StringBuffer stringBuffer = new StringBuffer();
    //         for (byte[] jpgByte : jpgBytes) {
    //             // executor.execute(new Runnable() {
    //             //     @Override
    //             //     public void run() {
    //             //         byteArrayToFile(jpgByte, "C:\\Users\\liyl\\Desktop\\1111\\" + IdUtil.getSnowflakeNextIdStr() + ".jpg");
    //                     String djlOcrStr = OcrV3RecognitionExample.djlOcr(jpgByte);
    //                     // System.out.println(djlOcrStr);
    //                     stringBuffer.append(djlOcrStr);
    //                     // System.out.println("ocr一张图片" + (System.currentTimeMillis() - timeInferStart));
    //                 // }
    //             // });
    //
    //         }
    //         ThreadPoolExecutor executor2 = new ThreadPoolExecutor(2, 2, 100, SECONDS, new ArrayBlockingQueue<Runnable>(100), Executors.defaultThreadFactory(), new ThreadPoolExecutor.AbortPolicy());
    //         List<Pdf2ImgDTO> jpg2 = pdf2Image(fileToByte("C:\\Users\\liyl\\Desktop\\客户资料1、富国基金客户信息采集表.pdf"), "jpg", 50);
    //         List<byte[]> jpgBytes2 = jpg2.stream().map(Pdf2ImgDTO::getPdfByte).collect(Collectors.toList());
    //         StringBuffer stringBuffer2 = new StringBuffer();
    //         for (byte[] jpgByte : jpgBytes2) {
    //             // executor2.execute(new Runnable() {
    //             //     @Override
    //             //     public void run() {
    //             //         byteArrayToFile(jpgByte, "C:\\Users\\liyl\\Desktop\\1111\\" + IdUtil.getSnowflakeNextIdStr() + ".jpg");
    //                     String djlOcrStr = OcrV3RecognitionExample.djlOcr(jpgByte);
    //                     // System.out.println(djlOcrStr);
    //                     stringBuffer2.append(djlOcrStr);
    //                     // System.out.println("ocr一张图片" + (System.currentTimeMillis() - timeInferStart));
    //                 // }
    //             // });
    //
    //         }
    //         // 等待所有线程执行完毕
    //         // executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
    //         double similarity = calculateCosineSimilarity(stringBuffer.toString(), stringBuffer2.toString());
    //         long timeInferEnd = System.currentTimeMillis();
    //         System.out.println();
    //         System.out.println("time: " + (timeInferEnd - timeInferStart));
    //         System.out.println("PDF文件相似度：" + similarity);
    //
    //     } catch (IOException e) {
    //         e.printStackTrace();
    //     } catch (Exception e) {
    //         e.printStackTrace();
    //     }
    // }
    
    private static byte[] fileToByte(String path) throws Exception {
        File file = new File(path);
        FileInputStream fis = new FileInputStream(file);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        byte[] b = new byte[1024];
        int n;
        while ((n = fis.read(b)) != -1) {
            bos.write(b, 0, n);
        }
        fis.close();
        bos.close();
        return bos.toByteArray();
    }
    
    /**
     * byte数组转File
     *
     * @param byteArray  字节数组
     * @param targetPath 目标路径
     */
    public static void byteArrayToFile(byte[] byteArray, String targetPath) {
        InputStream in = new ByteArrayInputStream(byteArray);
        File file = new File(targetPath);
        // String path = targetPath.substring(0, targetPath.lastIndexOf("/"));
        // if (!file.exists()) {
        //     new File(path).mkdir();
        // }
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(file);
            int len = 0;
            byte[] buf = new byte[1024];
            while ((len = in.read(buf)) != -1) {
                fos.write(buf, 0, len);
            }
            fos.flush();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (null != fos) {
                try {
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    
    /**
     * 转换全部的pdf
     *
     * @param pdfByte   pdf文件流
     * @param imageType 图片类型
     * @return
     */
    public static List<Pdf2ImgDTO> pdf2Image(byte[] pdfByte, String imageType, float dpi) throws Exception {
        List<Pdf2ImgDTO> pdf2ImgDTOS = new ArrayList<>();
        try (PDDocument doc = PDDocument.load(pdfByte); ByteArrayOutputStream arrayOutputStream = new ByteArrayOutputStream();){
            PDFRenderer renderer = new PDFRenderer(doc);
            int pageCount = doc.getNumberOfPages();
            for (int i = 0; i < pageCount; i++) {
                // Windows native DPI DEFAULT 144
                if (dpi == 0.0f) {
                    dpi = 144.0f;
                }
                BufferedImage image = renderer.renderImageWithDPI(i, dpi);
                ImageIO.write(image, imageType, arrayOutputStream);
                Pdf2ImgDTO pdf2ImgDTO = new Pdf2ImgDTO();
                pdf2ImgDTO.setPageNumber(i + 1);
                pdf2ImgDTO.setPdfByte(arrayOutputStream.toByteArray());
                pdf2ImgDTO.setImageType(imageType);
                pdf2ImgDTOS.add(pdf2ImgDTO);
            }
            return pdf2ImgDTOS;
        } catch (IOException e) {
            e.printStackTrace();
            throw new Exception(e.getMessage());
        }
    }
    
    /**
     * 合并PDF
     *
     * @param artifactTemplate  内容页
     * @return
     * @throws Exception
     */
    public static byte[] mergePdfFiles(List<byte[]> artifactTemplate) throws Exception {
        PdfReader reader;
        int pageNum;
        Document document = null;
        ByteArrayOutputStream byteArrayOutputStream = null;
        try {
            document = new Document(new PdfReader(artifactTemplate.get(0)).getPageSize(1));
            byteArrayOutputStream = new ByteArrayOutputStream();
            PdfCopy copy = new PdfCopy(document, byteArrayOutputStream);
            document.open();
            for (byte[] bytes : artifactTemplate) {
                // copy 主题内容模板
                reader = new PdfReader(bytes);
                pageNum = reader.getNumberOfPages();
                PdfImportedPage page;
                for (int j = 1; j <= pageNum; j++) {
                    document.newPage();
                    page = copy.getImportedPage(reader, j);
                    copy.addPage(page);
                }
            }
        } catch (Exception e) {
            throw e;
        } finally {
            if (document != null) {
                document.close();
            }
        }
        return byteArrayOutputStream.toByteArray();
    }
    
    private static String extractTextFromPDFItext(String file) throws IOException {
        PdfReader reader;
        try {
            // 指定PDF文件路径
            reader = new PdfReader(file);
            // 获取PDF文件总页数
            int totalPages = reader.getNumberOfPages();
            // 遍历所有页
            for (int i = 1; i <= totalPages; i++) {
                // 读取当前页的文字内容
                String pageContent = PdfTextExtractor.getTextFromPage(reader, i);
                // 输出当前页的文字内容
                // System.out.println(pageContent);
                return pageContent;
            }
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
    

    private static double calculateCosineSimilarity(String text1, String text2) {
        // CosineSimilarity cosineSimilarity = new CosineSimilarity();
        return CosineSimilarity.getSimilarity(text1, text2);
    }
}
